scenario-notebooks/Tools/PerfTools_Log Analytics_CustomTable_Setup.ipynb (727 lines of code) (raw):

{ "cells": [ { "cell_type": "markdown", "source": [ "# Performance Tools - Log Analytics Custom Table Setup\n", "\n", "__Notebook Version:__ 1.0<br>\n", "__Python Version:__ Python 3.8<br>\n", "__Apache Spark Version:__ 3.1<br>\n", "__Required Packages:__ azure-monitor-query, azure-mgmt-loganalytics<br>\n", "__Platforms Supported:__ Azure Synapse Analytics\n", " \n", "__Data Source Required:__ No \n", " \n", "### Description\n", "This notebook creates data collection endpoint, custom table, and data collect rules for Azure Log Analytics.<br>\n", "*** Please run the cells sequentially to avoid errors. Please do not use \"run all cells\". *** <br>\n", "\n", "## Table of Contents\n", "1. Warm-up\n", "2. Azure Authentication\n", "3. Create Data Collection Endpoint (DCE)\n", "4. Create Custom Table\n", "5. Create Data Collection Rule (DCR)" ], "metadata": {} }, { "cell_type": "markdown", "source": [ "## 1. Warm-up" ], "metadata": {} }, { "cell_type": "code", "source": [ "%pip install azure.monitor.query" ], "outputs": [], "execution_count": null, "metadata": { "jupyter": { "source_hidden": false, "outputs_hidden": false }, "nteract": { "transient": { "deleting": false } } } }, { "cell_type": "code", "source": [ "from azure.identity import AzureCliCredential, DefaultAzureCredential, ClientSecretCredential\n", "from azure.core.exceptions import HttpResponseError \n", "\n", "from datetime import datetime, timezone, timedelta\n", "import json\n", "from IPython.display import display, HTML, Markdown" ], "outputs": [], "execution_count": null, "metadata": { "gather": { "logged": 1690302564605 } } }, { "cell_type": "code", "source": [ "# User Inputs section 1\r\n", "tenant_id = \"\"\r\n", "subscription_id = \"\"\r\n", "workspace_id = \"\"\r\n", "\r\n", "# Azure KV for accessing service principal info\r\n", "akv_name = \"\"\r\n", "client_id_name = \"\"\r\n", "client_secret_name = \"\"\r\n", "akv_link_name = \"\"\r\n", "\r\n", "# User Inputs section 2\r\n", "# Parameters for provisioning resources\r\n", "resource_group_name = \"\"\r\n", "location = \"\"\r\n", "workspace_name = \"\"\r\n", "workspace_resource_id = \"/subscriptions/{0}/resourceGroups/{1}/providers/Microsoft.OperationalInsights/workspaces/{2}\".format(subscription_id, resource_group_name, workspace_name)\r\n", "data_collection_endpoint_name = \"\"\r\n", "data_collection_rule_name = \"\"\r\n", "custom_table_name = \"\"\r\n", "custom_table_full_name = \"Custom-\" + custom_table_name" ], "outputs": [], "execution_count": null, "metadata": { "jupyter": { "source_hidden": false, "outputs_hidden": false }, "nteract": { "transient": { "deleting": false } } } }, { "cell_type": "markdown", "source": [ "## 2. Azure Authentication" ], "metadata": {} }, { "cell_type": "code", "source": [ "# You may need to change resource_uri for various cloud environments.\r\n", "resource_uri = \"https://api.loganalytics.io\"\r\n", "client_id = mssparkutils.credentials.getSecret(akv_name, client_id_name, akv_link_name)\r\n", "client_secret = mssparkutils.credentials.getSecret(akv_name, client_secret_name, akv_link_name)\r\n", "\r\n", "credential = ClientSecretCredential(\r\n", " tenant_id=tenant_id, \r\n", " client_id=client_id, \r\n", " client_secret=client_secret)\r\n", "access_token = credential.get_token(resource_uri + \"/.default\")\r\n", "token = access_token[0]" ], "outputs": [], "execution_count": null, "metadata": { "jupyter": { "source_hidden": false, "outputs_hidden": false }, "nteract": { "transient": { "deleting": false } } } }, { "cell_type": "markdown", "source": [ "## 3. Create Data Collection Endpoint (DCE)" ], "metadata": { "nteract": { "transient": { "deleting": false } } } }, { "cell_type": "code", "source": [ "dce_json_string = \"\"\"\r\n", "{\r\n", " \"$schema\": \"https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#\",\r\n", " \"contentVersion\": \"1.0.0.0\",\r\n", " \"parameters\": {\r\n", " \"dataCollectionEndpointName\": {\r\n", " \"type\": \"string\",\r\n", " \"metadata\": {\r\n", " \"description\": \"Specifies the name of the Data Collection Endpoint to create.\"\r\n", " }\r\n", " },\r\n", " \"location\": {\r\n", " \"type\": \"string\",\r\n", " \"defaultValue\": \"eastus\",\r\n", " \"metadata\": {\r\n", " \"description\": \"Specifies the location for the Data Collection Endpoint.\"\r\n", " }\r\n", " }\r\n", " },\r\n", " \"resources\": [\r\n", " {\r\n", " \"type\": \"Microsoft.Insights/dataCollectionEndpoints\",\r\n", " \"name\": \"[parameters('dataCollectionEndpointName')]\",\r\n", " \"location\": \"[parameters('location')]\",\r\n", " \"apiVersion\": \"2021-04-01\",\r\n", " \"properties\": {\r\n", " \"networkAcls\": {\r\n", " \"publicNetworkAccess\": \"Enabled\"\r\n", " }\r\n", " }\r\n", " }\r\n", " ],\r\n", " \"outputs\": {\r\n", " \"dataCollectionEndpointId\": {\r\n", " \"type\": \"string\",\r\n", " \"value\": \"[resourceId('Microsoft.Insights/dataCollectionEndpoints', parameters('dataCollectionEndpointName'))]\"\r\n", " },\r\n", " \"endpoint\": {\r\n", " \"type\": \"object\",\r\n", " \"value\": \"[reference(resourceId('Microsoft.Insights/dataCollectionEndpoints', parameters('dataCollectionEndpointName'))).logsIngestion]\"\r\n", " }\r\n", " }\r\n", "}\r\n", "\"\"\"" ], "outputs": [], "execution_count": null, "metadata": { "jupyter": { "source_hidden": false, "outputs_hidden": false }, "nteract": { "transient": { "deleting": false } }, "gather": { "logged": 1690304357739 } } }, { "cell_type": "code", "source": [ "from azure.mgmt.resource import ResourceManagementClient\r\n", "from azure.mgmt.resource.resources.models import DeploymentMode\r\n", "\r\n", "resource_client = ResourceManagementClient(credential, subscription_id)\r\n", "template_body = json.loads(dce_json_string)\r\n", "\r\n", "rg_deployment_result = resource_client.deployments.begin_create_or_update(\r\n", " resource_group_name,\r\n", " \"exampleDeployment\",\r\n", " {\r\n", " \"properties\": {\r\n", " \"template\": template_body,\r\n", " \"parameters\": {\r\n", " \"location\": {\r\n", " \"value\": location\r\n", " },\r\n", " \"dataCollectionEndpointName\": {\r\n", " \"value\": data_collection_endpoint_name\r\n", " },\r\n", " },\r\n", " \"mode\": DeploymentMode.incremental\r\n", " }\r\n", " }\r\n", ")" ], "outputs": [], "execution_count": null, "metadata": { "jupyter": { "source_hidden": false, "outputs_hidden": false }, "nteract": { "transient": { "deleting": false } }, "gather": { "logged": 1690304363715 } } }, { "cell_type": "code", "source": [ "dce_res_id = ''\r\n", "dce_endpoint = ''\r\n", "if rg_deployment_result.status() != \"Succeeded\":\r\n", " print(rg_deployment_result.status())\r\n", " print('Run the cell until stauts=Succeeded or when you see Failed.')\r\n", "else:\r\n", " dce_resource_id = rg_deployment_result.result().properties.outputs[\"dataCollectionEndpointId\"].get(\"value\")\r\n", " dce_endpoint = rg_deployment_result.result().properties.outputs[\"endpoint\"].get(\"value\")['endpoint']\r\n", " print('You will need DCE Endpoint for future data ingestion!')\r\n", " print('DCE Endpoint: ' + dce_endpoint)" ], "outputs": [], "execution_count": null, "metadata": { "jupyter": { "source_hidden": false, "outputs_hidden": false }, "nteract": { "transient": { "deleting": false } }, "gather": { "logged": 1690305286390 } } }, { "cell_type": "markdown", "source": [ "## 4. Create Custom Table" ], "metadata": { "nteract": { "transient": { "deleting": false } } } }, { "cell_type": "code", "source": [ "# Please replace columns info with your own columns\r\n", "cus_table_json_string = \"\"\"\r\n", "{\r\n", " \"$schema\": \"https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#\",\r\n", " \"contentVersion\": \"1.0.0.0\",\r\n", " \"parameters\": {\r\n", " \"workspaceName\": {\r\n", " \"type\": \"string\",\r\n", " \"metadata\": {\r\n", " \"description\": \"LA workspace name.\"\r\n", " }\r\n", " },\r\n", " \"customTableName\": {\r\n", " \"type\": \"string\",\r\n", " \"metadata\": {\r\n", " \"description\": \"table name.\"\r\n", " }\r\n", " }\r\n", " },\r\n", " \"resources\": [\r\n", " {\r\n", " \"type\": \"Microsoft.OperationalInsights/workspaces/tables\",\r\n", " \"apiVersion\": \"2021-12-01-preview\",\r\n", " \"name\": \"[concat(parameters('workspaceName'), '/', parameters('customTableName'))]\",\r\n", " \"kind\": \"CustomLog\",\r\n", " \"properties\": {\r\n", " \"totalRetentionInDays\": 90,\r\n", " \"plan\": \"Analytics\",\r\n", " \"schema\": {\r\n", " \"name\": \"[parameters('customTableName')]\",\r\n", " \"columns\": [\r\n", " {\r\n", " \"name\": \"TimeGenerated\",\r\n", " \"type\": \"datetime\"\r\n", " },\r\n", " {\r\n", " \"name\": \"TimeInSeconds\",\r\n", " \"type\": \"real\"\r\n", " },\r\n", " {\r\n", " \"name\": \"QueryBody\",\r\n", " \"type\": \"string\"\r\n", " }\r\n", " ]\r\n", " },\r\n", " \"retentionInDays\": 90\r\n", " }\r\n", " }\r\n", " ],\r\n", " \"outputs\": {\r\n", " \"streamName\": {\r\n", " \"type\": \"string\",\r\n", " \"value\": \"[concat('Custom-', parameters('customTableName'))]\"\r\n", " }\r\n", " }\r\n", "}\r\n", "\"\"\"" ], "outputs": [], "execution_count": null, "metadata": { "jupyter": { "source_hidden": false, "outputs_hidden": false }, "nteract": { "transient": { "deleting": false } }, "gather": { "logged": 1690306156609 } } }, { "cell_type": "code", "source": [ "from azure.mgmt.resource import ResourceManagementClient\r\n", "from azure.mgmt.resource.resources.models import DeploymentMode\r\n", "\r\n", "resource_client = ResourceManagementClient(credential, subscription_id)\r\n", "template_body = json.loads(cus_table_json_string)\r\n", "table_tag = \"defaultct\"\r\n", "\r\n", "rg_deployment_result = resource_client.deployments.begin_create_or_update(\r\n", " resource_group_name,\r\n", " table_tag,\r\n", " {\r\n", " \"properties\": {\r\n", " \"template\": template_body,\r\n", " \"parameters\": {\r\n", " \"workspaceName\": {\r\n", " \"value\": workspace_name\r\n", " },\r\n", " \"customTableName\": {\r\n", " \"value\": custom_table_name\r\n", " }\r\n", " },\r\n", " \"mode\": DeploymentMode.incremental\r\n", " }\r\n", " }\r\n", ")" ], "outputs": [], "execution_count": null, "metadata": { "jupyter": { "source_hidden": false, "outputs_hidden": false }, "nteract": { "transient": { "deleting": false } }, "gather": { "logged": 1690306163834 } } }, { "cell_type": "code", "source": [ "stream_name = ''\r\n", "if rg_deployment_result.status() != \"Succeeded\":\r\n", " print(rg_deployment_result.status())\r\n", " print('Run the cell until stauts=Succeeded or when you see Failed.')\r\n", "else:\r\n", " stream_name = rg_deployment_result.result().properties.outputs[\"streamName\"].get(\"value\")\r\n", " print('You will need full stream name for future data ingestion!')\r\n", " print('Stream Name: ' + stream_name)" ], "outputs": [], "execution_count": null, "metadata": { "jupyter": { "source_hidden": false, "outputs_hidden": false }, "nteract": { "transient": { "deleting": false } }, "gather": { "logged": 1690306331174 } } }, { "cell_type": "markdown", "source": [ "## 5. Create Data Collection Rule (DCR)" ], "metadata": { "nteract": { "transient": { "deleting": false } } } }, { "cell_type": "code", "source": [ "# Please replace columns info with your own columns\r\n", "dcr_json_string = \"\"\"\r\n", "{\r\n", " \"$schema\": \"https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#\",\r\n", " \"contentVersion\": \"1.0.0.0\",\r\n", " \"parameters\": {\r\n", " \"dataCollectionRuleName\": {\r\n", " \"type\": \"string\",\r\n", " \"metadata\": {\r\n", " \"description\": \"Specifies the name of the Data Collection Rule to create.\"\r\n", " }\r\n", " },\r\n", " \"location\": {\r\n", " \"type\": \"string\",\r\n", " \"metadata\": {\r\n", " \"description\": \"Specifies the location in which to create the Data Collection Rule.\"\r\n", " }\r\n", " },\r\n", " \"workspaceResourceId\": {\r\n", " \"type\": \"string\",\r\n", " \"metadata\": {\r\n", " \"description\": \"Specifies the Azure resource ID of the Log Analytics workspace to use.\"\r\n", " }\r\n", " },\r\n", " \"workspaceName\": {\r\n", " \"type\": \"string\",\r\n", " \"metadata\": {\r\n", " \"description\": \"LA workspace name.\"\r\n", " }\r\n", " },\r\n", " \"endpointResourceId\": {\r\n", " \"type\": \"string\",\r\n", " \"metadata\": {\r\n", " \"description\": \"Specifies the Azure resource ID of the Data Collection Endpoint to use.\"\r\n", " }\r\n", " },\r\n", " \"customTableFullName\": {\r\n", " \"type\": \"string\",\r\n", " \"metadata\": {\r\n", " \"description\": \"table name.\"\r\n", " }\r\n", " }\r\n", " },\r\n", " \"resources\": [\r\n", " {\r\n", " \"type\": \"Microsoft.Insights/dataCollectionRules\",\r\n", " \"name\": \"[parameters('dataCollectionRuleName')]\",\r\n", " \"location\": \"[parameters('location')]\",\r\n", " \"apiVersion\": \"2021-09-01-preview\",\r\n", " \"properties\": {\r\n", " \"dataCollectionEndpointId\": \"[parameters('endpointResourceId')]\",\r\n", " \"streamDeclarations\": {\r\n", " \"[parameters('customTableFullName')]\": {\r\n", " \"columns\": [\r\n", " {\r\n", " \"name\": \"TimeGenerated\",\r\n", " \"type\": \"datetime\"\r\n", " },\r\n", " {\r\n", " \"name\": \"TimeInSeconds\",\r\n", " \"type\": \"real\"\r\n", " },\r\n", " {\r\n", " \"name\": \"QueryBody\",\r\n", " \"type\": \"string\"\r\n", " }\r\n", " ]\r\n", " }\r\n", " },\r\n", " \"destinations\": {\r\n", " \"logAnalytics\": [\r\n", " {\r\n", " \"workspaceResourceId\": \"[parameters('workspaceResourceId')]\",\r\n", " \"name\": \"[parameters('workspaceName')]\"\r\n", " }\r\n", " ]\r\n", " },\r\n", " \"dataFlows\": [\r\n", " {\r\n", " \"streams\": [\r\n", " \"[parameters('customTableFullName')]\"\r\n", " ],\r\n", " \"destinations\": [\r\n", " \"[parameters('workspaceName')]\"\r\n", " ],\r\n", " \"transformKql\": \"source\",\r\n", " \"outputStream\": \"[parameters('customTableFullName')]\"\r\n", " }\r\n", " ]\r\n", " }\r\n", " }\r\n", " ],\r\n", " \"outputs\": {\r\n", " \"dataCollectionRuleId\": {\r\n", " \"type\": \"string\",\r\n", " \"value\": \"[resourceId('Microsoft.Insights/dataCollectionRules', parameters('dataCollectionRuleName'))]\"\r\n", " },\r\n", " \"immutableId\": {\r\n", " \"type\": \"string\",\r\n", " \"value\": \"[reference(resourceId('Microsoft.Insights/dataCollectionRules', parameters('dataCollectionRuleName'))).immutableId]\"\r\n", " }\r\n", " }\r\n", "}\r\n", "\"\"\"" ], "outputs": [], "execution_count": null, "metadata": { "jupyter": { "source_hidden": false, "outputs_hidden": false }, "nteract": { "transient": { "deleting": false } }, "gather": { "logged": 1690307597100 } } }, { "cell_type": "code", "source": [ "from azure.mgmt.resource import ResourceManagementClient\r\n", "from azure.mgmt.resource.resources.models import DeploymentMode\r\n", "\r\n", "resource_client = ResourceManagementClient(credential, subscription_id)\r\n", "\r\n", "template_body = json.loads(dcr_json_string)\r\n", "dce_resource_id = '/subscriptions/{0}/resourceGroups/{1}/providers/Microsoft.Insights/dataCollectionEndpoints/{2}'.format(subscription_id, resource_group_name, data_collection_endpoint_name)\r\n", "endpoint_resource_id = dce_resource_id\r\n", "tag_name = \"defaultdcr\"\r\n", "\r\n", "rg_deployment_result = resource_client.deployments.begin_create_or_update(\r\n", " resource_group_name,\r\n", " tag_name,\r\n", " {\r\n", " \"properties\": {\r\n", " \"template\": template_body,\r\n", " \"parameters\": {\r\n", " \"location\": {\r\n", " \"value\": location\r\n", " },\r\n", " \"dataCollectionRuleName\": {\r\n", " \"value\": data_collection_rule_name\r\n", " },\r\n", " \"workspaceResourceId\": {\r\n", " \"value\": workspace_resource_id\r\n", " },\r\n", " \"workspaceName\": {\r\n", " \"value\": workspace_name\r\n", " },\r\n", " \"endpointResourceId\": {\r\n", " \"value\": endpoint_resource_id\r\n", " },\r\n", " \"customTableFullName\": {\r\n", " \"value\": custom_table_full_name\r\n", " }\r\n", " },\r\n", " \"mode\": DeploymentMode.incremental\r\n", " }\r\n", " }\r\n", ")" ], "outputs": [], "execution_count": null, "metadata": { "jupyter": { "source_hidden": false, "outputs_hidden": false }, "nteract": { "transient": { "deleting": false } }, "gather": { "logged": 1690307603837 } } }, { "cell_type": "code", "source": [ "immutable_id = ''\r\n", "if rg_deployment_result.status() != \"Succeeded\":\r\n", " print(rg_deployment_result.status())\r\n", " print('Run the cell until stauts=Succeeded or when you see Failed.')\r\n", "else:\r\n", " immutable_id = rg_deployment_result.result().properties.outputs[\"immutableId\"].get(\"value\")\r\n", " print('You will need DCR Immutable Id for future data ingestion!')\r\n", " print('DCR Immutable Id: ' + immutable_id)\r\n" ], "outputs": [], "execution_count": null, "metadata": { "jupyter": { "source_hidden": false, "outputs_hidden": false }, "nteract": { "transient": { "deleting": false } }, "gather": { "logged": 1690307790923 } } }, { "cell_type": "markdown", "source": [ "* Once finished everything, make sure adding the Entra ID to the new DCR as a Monitoring Metrics Publisher" ], "metadata": { "nteract": { "transient": { "deleting": false } } } } ], "metadata": { "kernelspec": { "name": "synapse_pyspark", "display_name": "Synapse PySpark" }, "language_info": { "name": "python" }, "description": null, "save_output": true, "synapse_widget": { "version": "0.1", "state": {} } }, "nbformat": 4, "nbformat_minor": 2 }